knitr::opts_chunk$set(echo = TRUE,eval=TRUE, message=FALSE, warning=FALSE)
Load packages:
library(readxl)
library(magrittr)
library(dplyr)
library(ggplot2)
library(ggmap)
library(ggthemes)
library(ggpubr)
library(ggforce)
library(tidyverse)
library(gmodels)
library(rgdal)
library(osmdata)
library(nominatim)
library(jsonlite)
library(RColorBrewer)
library(tidyr)
library(leaflet)
library(maps)
library(tigris)
library(tidytext)
library(textdata)
library(tm)
library(quanteda)
library(rvest)
library(stringr)
library(SnowballC)
library(wordcloud)
library(plotrix)
library(qdapDictionaries)
library(formattable)
library(stringr)
library(DT)
Import database:
Tesla<-read.csv("/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/(update)TSLA_sentiment_analysis.csv")
Create and preprocess text in corpus (162 tweets)
doc_id<-c(1:126)
line<-c(rep(1,126))
text<-data.frame(doc_id, text=Tesla$tweet,line, stringsAsFactors=FALSE)
df_source<-DataframeSource(text)
df_corpus<-VCorpus(df_source)
#Clean text
#removefullycap<-function(x){gsub("[A-Z][A-Z]+", " ", x)}
#removeemoji<-function(x){gsub("[^\x01-\x7F]", "", x)}
clean_corpus<-function(corpus){
#corpus<-tm_map(corpus, content_transformer(removefullycap))
#corpus<-tm_map(corpus, content_transformer(removeemoji))
#corpus<-tm_map(corpus, content_transformer(tolower))
#corpus<-tm_map(corpus, removeWords, c(stopwords("en")))
corpus<-tm_map(corpus, removeNumbers)
corpus<-tm_map(corpus, removePunctuation)
corpus<-tm_map(corpus, stripWhitespace)
return(corpus)
}
corpus_clean<-clean_corpus(df_corpus)
corpus_dtm<-DocumentTermMatrix(corpus_clean)
corpus_m<-as.matrix(corpus_dtm)
corpus_dtm
<<DocumentTermMatrix (documents: 126, terms: 1572)>>
Non-/sparse entries: 3647/194425
Sparsity : 98%
Maximal term length: 17
Weighting : term frequency (tf)
dim(corpus_m)
[1] 126 1572
Calculate tf_itf scores for tweets.
#Tidy objects
corpustd<-tidy(corpus_dtm)
#Calculate frequencies
corpustf_idf<-corpustd %>%
bind_tf_idf(term, document, count) %>%
arrange(desc(tf_idf))
corpustf_idf
Word cloud for Tesla CEO’s tweets
purple_orange<-brewer.pal(10, "PuOr")
purple_orange<-purple_orange[-(1:2)]
set.seed(2200)
wordcloud(corpustf_idf$term, corpustf_idf$tf, max.words=100, colors=purple_orange)
Calculate the tone of each text based on the positive and negative words that are being used in the tweets.
First build the sentiment function. Obtain the list of positive.words and negative.words from the sentiment dictionary of Hu & Liu (2004) using the qdapDictionaries package.
sentiment<-function(words=c("really great good stuff bad")){
tok<-tokens(words)
pos.count<-sum(tok[[1]] %in% positive.words)
#cat("\n positive words:",tok[[1]][which(tok[[1]]%in%positive.words)],"\n")
neg.count<-sum(tok[[1]]%in%negative.words)
#cat("\n negative words:",tok[[1]][which(tok[[1]]%in%negative.words)],"\n")
out<-(pos.count-neg.count)/(pos.count+neg.count)
#cat("\n Tone of Document:",out)
return(out)
}
Apply the function on text of the 162 tweets.
toneofdocument<-Tesla
toneofdocument<-toneofdocument
for (i in 1:126){
toneofdocument$toneofdocument[i]<-sentiment(toneofdocument$tweet[i])
}
toneofdocument
write.csv(toneofdocument,"/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Tesla tone of document.csv", row.names = FALSE)
Visualize the relationship between tone of the document and daily stock price change.
plot<-toneofdocument %>%
ggplot(aes(x=toneofdocument,y=DailyChange))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="Tone of Document", y="Daily stock price change", title="Relationship between Tone of Document and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5, angle=45,size=4.5))+
coord_flip()
plot
Ignore all documents that do not have words contained within the Hu & Liu dictionary and all neutral scores. Collapse the positive and negative texts into two larger documents.
positivetone<-toneofdocument %>%
filter(toneofdocument>0) %>%
select(tweet)
positivetext<-paste(unlist(positivetone$tweet), collapse=" ")
negativetone<-toneofdocument %>%
filter(toneofdocument<0) %>%
select(tweet)
negativetext<-paste(unlist(negativetone$tweet), collapse=" ")
Generate comparison cloud showing the most-frequent positive and negative words, where document 1 denotes positive texts, and document 2 denotes negative texts.
doc_id<-c("Frequent words in positive sentiment tweets","Frequent words in negative sentiment tweets")
line<-c(1,1)
comparisontext<-data.frame(doc_id, text=c(positivetext, negativetext),line, stringsAsFactors=FALSE)
df_source<-DataframeSource(comparisontext)
df_corpus<-VCorpus(df_source)
corpuscomparison_clean<-clean_corpus(df_corpus)
corpuscomparison_tdm<-TermDocumentMatrix(corpuscomparison_clean)
corpuscomparison_m<-as.matrix(corpuscomparison_tdm)
set.seed(2105)
comparison.cloud(corpuscomparison_m, colors=c("orange", "purple"), scale=c(0.1,2), title.size=1, max.words=100)
Identify emotions (anger, anticipation, disgust, fear, sadness, sadness, surprise, trust) as well as negative and positive using the NRC Word-Emotion Association Lexicon in the tidytext package.
nrc_anger<-get_sentiments("nrc") %>%
filter(sentiment=="anger")
nrc_anticipation<-get_sentiments("nrc") %>%
filter(sentiment=="anticipation")
nrc_disgust<-get_sentiments("nrc") %>%
filter(sentiment=="disgust")
nrc_fear<-get_sentiments("nrc") %>%
filter(sentiment=="fear")
nrc_joy<-get_sentiments("nrc") %>%
filter(sentiment=="joy")
nrc_sadness<-get_sentiments("nrc") %>%
filter(sentiment=="sadness")
nrc_surprise<-get_sentiments("nrc") %>%
filter(sentiment=="surprise")
nrc_trust<-get_sentiments("nrc") %>%
filter(sentiment=="trust")
nrc_negative<-get_sentiments("nrc") %>%
filter(sentiment=="negative")
nrc_positive<-get_sentiments("nrc") %>%
filter(sentiment=="positive")
nrc_anger$word
[1] "abandoned" "abandonment" "abhor" "abhorrent" "abolish" "abomination"
[7] "abuse" "accursed" "accusation" "accused" "accuser" "accusing"
[13] "actionable" "adder" "adversary" "adverse" "adversity" "advocacy"
[19] "affront" "aftermath" "aggravated" "aggravating" "aggravation" "aggression"
[25] "aggressive" "aggressor" "agitated" "agitation" "agony" "alcoholism"
[31] "alienate" "alienation" "allegation" "altercation" "ambush" "anarchism"
[37] "anarchist" "anarchy" "anathema" "anger" "angry" "anguish"
[43] "animosity" "animus" "annihilate" "annihilated" "annihilation" "annoy"
[49] "annoyance" "annoying" "antagonism" "antagonist" "antagonistic" "antichrist"
[55] "antipathy" "antisocial" "antithesis" "anxiety" "argue" "argument"
[61] "argumentation" "arguments" "armament" "armed" "arraignment" "arrogant"
[67] "arson" "assail" "assailant" "assassin" "assassinate" "assassination"
[73] "assault" "asshole" "atrocious" "atrocity" "attack" "attacking"
[79] "attorney" "avarice" "avenger" "averse" "aversion" "awful"
[85] "backbone" "bad" "badger" "badness" "bane" "bang"
[91] "banger" "banish" "banished" "banishment" "bankruptcy" "banshee"
[97] "barb" "barbaric" "bark" "barrier" "bastion" "battalion"
[103] "batter" "battery" "battle" "battled" "bayonet" "bear"
[109] "bearish" "beast" "beating" "bee" "belittle" "belligerent"
[115] "bellows" "belt" "berserk" "betray" "betrayal" "bias"
[121] "bickering" "bigot" "bigoted" "bile" "birch" "birthplace"
[127] "bitch" "bitterly" "bitterness" "blackmail" "blame" "blasphemous"
[133] "blasphemy" "blast" "blatant" "blaze" "blemish" "blockade"
[139] "bloodshed" "bloodthirsty" "bloody" "bogus" "boisterous" "bomb"
[145] "bombard" "bombardment" "bothering" "bout" "boxing" "brawl"
[151] "brazen" "brimstone" "broil" "broken" "brunt" "brutal"
[157] "brutality" "brute" "buffet" "bugaboo" "bully" "bummer"
[163] "burial" "burke" "busted" "butcher" "cacophony" "cad"
[169] "callous" "campaigning" "cancer" "cane" "canker" "cannon"
[175] "carelessness" "carnage" "cash" "casualty" "catastrophe" "caution"
[181] "celebrity" "censor" "chaff" "challenge" "chant" "chaos"
[187] "chaotic" "cheat" "choke" "claimant" "clamor" "clash"
[193] "clashing" "claw" "coerce" "coercion" "coldness" "collision"
[199] "collusion" "combat" "combatant" "combative" "commotion" "communism"
[205] "complain" "complaint" "complicate" "compress" "compulsion" "concealment"
[211] "concussion" "condemn" "condemnation" "condescension" "confine" "confined"
[217] "confinement" "confiscate" "conflagration" "conflict" "confront" "confusion"
[223] "conquest" "conspirator" "consternation" "constraint" "contempt" "contemptible"
[229] "contemptuous" "contentious" "contraband" "contradict" "controversial" "convict"
[235] "coop" "copycat" "corrupting" "counsellor" "coup" "court"
[241] "crabby" "cracked" "cranky" "crazed" "crazy" "crime"
[247] "criminal" "criminality" "criticism" "criticize" "cross" "crucifixion"
[253] "cruel" "cruelly" "cruelty" "crunch" "crusade" "crushed"
[259] "crushing" "cur" "curse" "cursed" "cursing" "cussed"
[265] "cutthroat" "cutting" "dabbling" "daemon" "damage" "dame"
[271] "damn" "damnation" "darkness" "dashed" "dastardly" "deadly"
[277] "death" "deceit" "deceive" "deceived" "decry" "defamatory"
[283] "defect" "defendant" "defense" "defiance" "defiant" "deflate"
[289] "defraud" "defy" "degeneracy" "delay" "deleterious" "delinquent"
[295] "delusion" "delusional" "demand" "demolish" "demon" "demonic"
[301] "denounce" "denunciation" "deny" "deplorable" "deplore" "deportation"
[307] "depraved" "depravity" "depreciate" "depreciated" "depressed" "deprivation"
[313] "deranged" "derision" "derogation" "derogatory" "desecration" "desert"
[319] "deserted" "deserve" "desist" "despair" "despicable" "despise"
[325] "despotism" "destroyed" "destroyer" "destroying" "destruction" "destructive"
[331] "detainee" "deterioration" "detest" "detonation" "detract" "devastate"
[337] "devastating" "devastation" "devil" "diabolical" "diatribe" "dictatorial"
[343] "dictatorship" "difficulty" "disagree" "disagreeing" "disagreement" "disallowed"
[349] "disappoint" "disappointed" "disapprove" "disapproved" "disapproving" "disaster"
[355] "disastrous" "disclaim" "discontent" "discord" "discriminate" "discrimination"
[361] "disdain" "disease" "disfigured" "disgrace" "disgraced" "disgraceful"
[367] "disgruntled" "disgust" "disgusting" "dishonest" "dishonor" "disillusionment"
[373] "disinformation" "dislike" "disliked" "dislocated" "dismay" "dismissal"
[379] "disobedience" "disobedient" "disobey" "disparage" "disparaging" "disparity"
[385] "displaced" "displeased" "dispossessed" "dispute" "disqualified" "disreputable"
[391] "disrespect" "disrespectful" "disruption" "dissension" "disservice" "dissident"
[397] "dissolution" "dissonance" "distracted" "distracting" "distress" "distressing"
[403] "distrust" "disturbance" "disturbed" "disused" "divorce" "dominate"
[409] "domination" "doomsday" "dreadful" "duel" "dumps" "dupe"
[415] "duplicity" "duress" "dying" "earthquake" "effigy" "egregious"
[421] "elbow" "elf" "elimination" "encumbrance" "endless" "enemy"
[427] "enforce" "enmity" "enslaved" "entangled" "epidemic" "eradicate"
[433] "eradication" "erupt" "eruption" "escalate" "eschew" "evade"
[439] "eviction" "evil" "exacerbation" "exaggerate" "exasperation" "excitation"
[445] "excite" "execution" "executioner" "exile" "expel" "expletive"
[451] "explode" "explosive" "expulsion" "extermination" "extinguish" "failing"
[457] "fallacious" "falsehood" "falsification" "fatal" "fear" "fee"
[463] "feeling" "fenced" "ferocious" "ferocity" "fervor" "feud"
[469] "feudalism" "fib" "fiend" "fierce" "fight" "fighting"
[475] "firearms" "fits" "flagrant" "fleece" "flog" "fluctuation"
[481] "foe" "foray" "forbidding" "force" "forcibly" "forearm"
[487] "forfeit" "forsaken" "foul" "fraud" "fraudulent" "frenetic"
[493] "frenzied" "friction" "frightful" "frowning" "frustrate" "frustrated"
[499] "frustration" "fugitive" "fuming" "furious" "furiously" "furnace"
[505] "furor" "fury" "fuss" "gall" "gallows" "gang"
[511] "gent" "gibberish" "glare" "glaring" "gnome" "godless"
[517] "gonorrhea" "gore" "gory" "grab" "grated" "grating"
[523] "greed" "grievance" "grievous" "grim" "grope" "growl"
[529] "growling" "grudge" "gruff" "grumble" "grumpy" "guillotine"
[535] "guilty" "gun" "halter" "hamstring" "hanging" "harass"
[541] "harassing" "harbinger" "hardened" "harmful" "harry" "harshness"
[547] "hate" "hateful" "hating" "hatred" "haughty" "havoc"
[553] "hell" "hellish" "hiss" "hit" "hoax" "holocaust"
[559] "homeless" "homicidal" "homicide" "honest" "hood" "hoot"
[565] "hopelessness" "horrible" "horrid" "horrific" "horror" "hostage"
[571] "hostile" "hostilities" "hostility" "hot" "howl" "huff"
[577] "humbug" "humiliate" "hunting" "hurt" "hurtful" "hurting"
[583] "hysterical" "idiocy" "idiotic" "ill" "illegal" "illegality"
[589] "illegitimate" "illicit" "immaturity" "immoral" "immorality" "impermeable"
[595] "implicate" "impotence" "imprisoned" "imprisonment" "inadmissible" "inappropriate"
[601] "inattention" "incarceration" "incase" "incendiary" "incense" "incest"
[607] "incite" "incompatible" "incompetent" "incongruous" "inconsiderate" "inconvenient"
[613] "incredulous" "incurable" "indecency" "indenture" "indict" "indifference"
[619] "indignant" "indignation" "indoctrination" "inept" "inequality" "inexcusable"
[625] "infamous" "infanticide" "infantile" "inferno" "infidel" "infidelity"
[631] "inflict" "infraction" "inhibit" "inhuman" "inimical" "injure"
[637] "injurious" "injury" "injustice" "inoperative" "insane" "insanity"
[643] "insecure" "insidious" "insignificant" "instinctive" "insufficiency" "insult"
[649] "insulting" "insurrection" "intense" "interminable" "interrupt" "intimidation"
[655] "intolerable" "intolerance" "intolerant" "intractable" "intruder" "intrusive"
[661] "invade" "invader" "invasion" "involution" "involvement" "irate"
[667] "ire" "irreconcilable" "irritability" "irritable" "irritating" "irritation"
[673] "jab" "jealous" "jealousy" "jeopardize" "jerk" "kick"
[679] "kicking" "kidnap" "killing" "lace" "lagging" "lash"
[685] "latent" "lava" "lawlessness" "lawsuit" "lawyer" "legalized"
[691] "leukemia" "libel" "liberate" "lie" "lightning" "limited"
[697] "liquor" "litigate" "litigious" "livid" "loath" "loathe"
[703] "loathsome" "lonely" "lose" "losing" "loss" "loudness"
[709] "lunacy" "lunatic" "lying" "lynch" "mad" "madden"
[715] "madman" "madness" "malevolent" "malice" "malicious" "malign"
[721] "malignant" "malpractice" "mangle" "maniac" "manipulation" "manslaughter"
[727] "martial" "masochism" "massacre" "mastery" "meddle" "melodrama"
[733] "menace" "menacing" "mighty" "militia" "misbehavior" "misconception"
[739] "miserable" "misery" "mislead" "misleading" "misplace" "misrepresented"
[745] "misstatement" "mistress" "misunderstanding" "mob" "mocking" "molestation"
[751] "money" "monstrosity" "moody" "moral" "morals" "morbidity"
[757] "mortality" "mosque" "mosquito" "mournful" "muff" "mug"
[763] "mule" "murder" "murderer" "murderous" "musical" "mutilation"
[769] "mutiny" "mutter" "myopia" "nag" "nasty" "negation"
[775] "neglected" "nepotism" "nether" "nettle" "noisy" "noncompliance"
[781] "notoriety" "nuisance" "nurture" "objection" "obliging" "obliterate"
[787] "obliterated" "oblivion" "obnoxious" "obscenity" "obstacle" "obstruct"
[793] "obstructive" "odious" "offend" "offended" "offender" "offense"
[799] "offensive" "onerous" "opera" "opinionated" "opium" "opponent"
[805] "opposed" "opposition" "oppress" "oppression" "oppressive" "oppressor"
[811] "orc" "orchestra" "ordeal" "oust" "outburst" "outcry"
[817] "outrage" "overbearing" "overpowering" "overpriced" "owing" "painful"
[823] "paralysis" "paralyzed" "pare" "patter" "paucity" "payback"
[829] "penalty" "penetration" "penitentiary" "perdition" "pernicious" "perpetrator"
[835] "persecute" "persecution" "perverse" "perversion" "pervert" "pessimism"
[841] "pest" "phony" "picket" "picketing" "pillage" "pique"
[847] "pirate" "pitfall" "playful" "plunder" "poaching" "poison"
[853] "poisoned" "poisonous" "polemic" "politics" "possessed" "possession"
[859] "pound" "poverty" "pow" "powerful" "powerless" "preclude"
[865] "prejudice" "prejudicial" "presumptuous" "pretending" "prick" "prison"
[871] "prisoner" "profane" "profanity" "prohibited" "prosecute" "provocation"
[877] "provoking" "pry" "psychosis" "punch" "punished" "punishing"
[883] "punishment" "punitive" "quandary" "quarrel" "rabble" "rabid"
[889] "rage" "raging" "raid" "rail" "ram" "rampage"
[895] "ransom" "rape" "rapping" "rascal" "rating" "rave"
[901] "ravenous" "raving" "react" "rebel" "rebellion" "recalcitrant"
[907] "recession" "recidivism" "reckless" "recklessness" "reject" "rejection"
[913] "rejects" "remand" "remiss" "remove" "renegade" "renounce"
[919] "repay" "repellent" "reprimand" "reprisal" "reproach" "repudiation"
[925] "resent" "resentful" "resentment" "resign" "resistance" "resisting"
[931] "restitution" "restrain" "restriction" "retaliate" "retaliation" "retaliatory"
[937] "retract" "retribution" "revenge" "reversal" "revoke" "revolt"
[943] "revolting" "revolution" "revolver" "revulsion" "rheumatism" "ribbon"
[949] "ridicule" "ridiculous" "rifle" "ringer" "riot" "riotous"
[955] "rivalry" "rob" "robbery" "rocket" "rook" "row"
[961] "ruined" "ruinous" "ruthless" "saber" "sabotage" "saloon"
[967] "sarcasm" "satanic" "savage" "savagery" "scandalous" "scapegoat"
[973] "scar" "scarcity" "scare" "schism" "schizophrenia" "scoff"
[979] "scold" "scolding" "scorching" "scorn" "scorpion" "scoundrel"
[985] "scourge" "scrapie" "scream" "screaming" "screwed" "sectarian"
[991] "sedition" "segregate" "selfish" "senseless" "sentence" "separatist"
[997] "shackle" "shaky" "sham" "sharpen"
[ reached getOption("max.print") -- omitted 247 entries ]
angryf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
angercount<-sum(tok[[1]] %in% nrc_anger$word)
angerf<-angercount/wordcount
return(angerf)
}
anticipationf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
anticipationcount<-sum(tok[[1]] %in% nrc_anticipation$word)
anticipationf<-anticipationcount/wordcount
return(anticipationf)
}
disgustf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
disgustcount<-sum(tok[[1]] %in% nrc_disgust$word)
disgustf<-disgustcount/wordcount
return(disgustf)
}
fearf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
fearcount<-sum(tok[[1]] %in% nrc_fear$word)
fearf<-fearcount/wordcount
return(fearf)
}
joyf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
joycount<-sum(tok[[1]] %in% nrc_joy$word)
joyf<-joycount/wordcount
return(joyf)
}
sadnessf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
sadnesscount<-sum(tok[[1]] %in% nrc_surprise$word)
sadnessf<-sadnesscount/wordcount
return(sadnessf)
}
surprisef<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
surprisecount<-sum(tok[[1]] %in% nrc_sadness$word)
surprisef<-surprisecount/wordcount
return(surprisef)
}
trustf<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
trustcount<-sum(tok[[1]] %in% nrc_trust$word)
trustf<-trustcount/wordcount
return(trustf)
}
negativef<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
negativecount<-sum(tok[[1]] %in% nrc_negative$word)
negativef<-negativecount/wordcount
return(negativef)
}
positivef<-function(words=c("Check out the frequency of words that represent emotions")){
tok<-tokens(words)
wordcount<-length(tok[[1]])
positivecount<-sum(tok[[1]] %in% nrc_positive$word)
positivef<-positivecount/wordcount
return(positivef)
}
emotionsfrequency<-Tesla
for (i in 1:126){
emotionsfrequency$angryf[i]<-angryf(emotionsfrequency$tweet[i])
emotionsfrequency$anticipationf[i]<-anticipationf(emotionsfrequency$tweet[i])
emotionsfrequency$disgustf[i]<-disgustf(emotionsfrequency$tweet[i])
emotionsfrequency$fearf[i]<-fearf(emotionsfrequency$tweet[i])
emotionsfrequency$joyf[i]<-joyf(emotionsfrequency$tweet[i])
emotionsfrequency$sadnessf[i]<-sadnessf(emotionsfrequency$tweet[i])
emotionsfrequency$surprisef[i]<-surprisef(emotionsfrequency$tweet[i])
emotionsfrequency$trustf[i]<-trustf(emotionsfrequency$tweet[i])
emotionsfrequency$negativef[i]<-negativef(emotionsfrequency$tweet[i])
emotionsfrequency$positivef[i]<-positivef(emotionsfrequency$tweet[i])
emotionsfrequency$langryf[i]<-log(emotionsfrequency$angryf[i]+1)
emotionsfrequency$lanticipationf[i]<-log(emotionsfrequency$anticipationf[i]+1)
emotionsfrequency$ldisgustf[i]<-log(emotionsfrequency$disgustf[i]+1)
emotionsfrequency$lfearf[i]<-log(emotionsfrequency$fearf[i]+1)
emotionsfrequency$ljoyf[i]<-log(emotionsfrequency$joyf[i]+1)
emotionsfrequency$lsadnessf[i]<-log(emotionsfrequency$sadnessf[i]+1)
emotionsfrequency$lsurprisef[i]<-log(emotionsfrequency$surprisef[i]+1)
emotionsfrequency$ltrustf[i]<-log(emotionsfrequency$trustf[i]+1)
emotionsfrequency$lnegativef[i]<-log(emotionsfrequency$negativef[i]+1)
emotionsfrequency$lpositivef[i]<-log(emotionsfrequency$positivef[i]+1)
}
emotionsfrequency
write.csv(emotionsfrequency,"/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Tesla frequency of emotions.csv", row.names = FALSE)
plotangry<-emotionsfrequency %>%
ggplot(aes(x=langryf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of angry words in the text (Log)", y="Daily stock price change", title="Relationship between Angry emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotangry
#ggsave('Angry-linear.png')
plotanticipation<-emotionsfrequency %>%
ggplot(aes(x=lanticipationf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of anticipation words in the text (Log)", y="Daily stock price change", title="Relationship between Anticipation emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotanticipation
#ggsave('Anticipation-linear.png')
plotdisgust<-emotionsfrequency %>%
ggplot(aes(x=ldisgustf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of disgust words in the text (Log)", y="Daily stock price change", title="Relationship between Disgust emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotdisgust
#ggsave('Disgust-linear.png')
plotjoy<-emotionsfrequency %>%
ggplot(aes(x=ljoyf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of joy words in the text (Log)", y="Daily stock price change", title="Relationship between Joy emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotjoy
#ggsave('Joy-linear.png')
plotfear<-emotionsfrequency %>%
ggplot(aes(x=lfearf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of fear words in the text (Log)", y="Daily stock price change", title="Relationship between Fear emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotfear
#ggsave('Fear-linear.png')
plotsadness<-emotionsfrequency %>%
ggplot(aes(x=lsadnessf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of sadness words in the text (Log)", y="Daily stock price change", title="Relationship between Sadness emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotsadness
#ggsave('Sadness-linear.png')
plotsurprise<-emotionsfrequency %>%
ggplot(aes(x=lsurprisef,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of sadness words in the text (Log)", y="Daily stock price change", title="Relationship between Surprise emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotsurprise
#ggsave('Surprise-linear.png')
plottrust<-emotionsfrequency %>%
ggplot(aes(x=ltrustf,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of trust words in the text (Log)", y="Daily stock price change", title="Relationship between Trust emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plottrust
#ggsave('Trust-linear.png')
plotnegative<-emotionsfrequency %>%
ggplot(aes(x=lnegativef,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of negative words in the text (Log)", y="Daily stock price change", title="Relationship between Negative emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotnegative
#ggsave('Negative-linear.png')
plotpositive<-emotionsfrequency %>%
ggplot(aes(x=lpositivef,y=DailyChange))+
geom_jitter(color="#00CED1")+
geom_smooth(method="lm", se=FALSE, color="red")+
theme_bw()+
theme(legend.position="none")+
labs(x="Frequency of positive words in the text (Log)", y="Daily stock price change", title="Relationship between Positive emotions and Daily Stock Price Change")+
theme(plot.title=element_text(hjust=0.5))+
theme(axis.text.x=element_text(hjust=0.5))
plotpositive
#ggsave('Positive-linear.png')
Linear regression
emotionsfrequency$DailyChange<-as.numeric(emotionsfrequency$DailyChange)
lm<-lm(DailyChange ~ angryf+anticipationf+disgustf+fearf+joyf+sadnessf+surprisef+trustf+negativef+positivef, data=emotionsfrequency)
summary(lm)
Call:
lm(formula = DailyChange ~ angryf + anticipationf + disgustf +
fearf + joyf + sadnessf + surprisef + trustf + negativef +
positivef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.071299 -0.005739 -0.000184 0.005641 0.069719
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001005 0.003243 0.310 0.7573
angryf -0.010694 0.033506 -0.319 0.7502
anticipationf 0.055052 0.034698 1.587 0.1154
disgustf -0.118751 0.053738 -2.210 0.0292 *
fearf 0.003373 0.040620 0.083 0.9340
joyf -0.048200 0.043275 -1.114 0.2678
sadnessf 0.009555 0.056688 0.169 0.8665
surprisef 0.057796 0.049961 1.157 0.2498
trustf -0.024241 0.025739 -0.942 0.3483
negativef 0.012257 0.033866 0.362 0.7181
positivef 0.003699 0.024363 0.152 0.8796
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.01639 on 111 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.08138, Adjusted R-squared: -0.001374
F-statistic: 0.9834 on 10 and 111 DF, p-value: 0.4621
llm<-lm(DailyChange ~ langryf+lanticipationf+ldisgustf+lfearf+ljoyf+lsadnessf+lsurprisef+ltrustf+lnegativef+lpositivef, data=emotionsfrequency)
summary(llm)
Call:
lm(formula = DailyChange ~ langryf + lanticipationf + ldisgustf +
lfearf + ljoyf + lsadnessf + lsurprisef + ltrustf + lnegativef +
lpositivef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.070962 -0.005960 -0.000026 0.005853 0.069116
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0008243 0.0033894 0.243 0.8083
langryf -0.0133337 0.0370231 -0.360 0.7194
lanticipationf 0.0616512 0.0375109 1.644 0.1031
ldisgustf -0.1303422 0.0597663 -2.181 0.0313 *
lfearf 0.0054824 0.0443386 0.124 0.9018
ljoyf -0.0478190 0.0471451 -1.014 0.3126
lsadnessf 0.0081724 0.0590289 0.138 0.8901
lsurprisef 0.0593145 0.0542168 1.094 0.2763
ltrustf -0.0227729 0.0293115 -0.777 0.4389
lnegativef 0.0142355 0.0372961 0.382 0.7034
lpositivef 0.0012925 0.0277854 0.047 0.9630
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.01641 on 111 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.07963, Adjusted R-squared: -0.003291
F-statistic: 0.9603 on 10 and 111 DF, p-value: 0.482
emotionsfrequency$DailyChange<-as.numeric(emotionsfrequency$DailyChange)
lmangry<-lm(DailyChange ~ angryf, data=emotionsfrequency)
summary(lmangry)
Call:
lm(formula = DailyChange ~ angryf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075182 -0.005894 0.000326 0.005573 0.078318
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001882 0.001647 1.143 0.255
angryf -0.001529 0.025035 -0.061 0.951
Residual standard error: 0.01645 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 3.107e-05, Adjusted R-squared: -0.008302
F-statistic: 0.003728 on 1 and 120 DF, p-value: 0.9514
emotionsfrequency$DailyChange<-as.numeric(emotionsfrequency$DailyChange)
llmangry<-lm(DailyChange ~ langryf, data=emotionsfrequency)
summary(llmangry)
Call:
lm(formula = DailyChange ~ langryf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075244 -0.005944 0.000353 0.005706 0.078256
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001944 0.001672 1.163 0.247
langryf -0.003967 0.028906 -0.137 0.891
Residual standard error: 0.01645 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0001569, Adjusted R-squared: -0.008175
F-statistic: 0.01884 on 1 and 120 DF, p-value: 0.8911
lmanticipation<-lm(DailyChange ~ anticipationf, data=emotionsfrequency)
summary(lmanticipation)
Call:
lm(formula = DailyChange ~ anticipationf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.073232 -0.005955 0.000723 0.005658 0.077430
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.754e-05 1.995e-03 -0.034 0.973
anticipationf 3.122e-02 2.197e-02 1.421 0.158
Residual standard error: 0.01631 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.01655, Adjusted R-squared: 0.008353
F-statistic: 2.019 on 1 and 120 DF, p-value: 0.1579
llmanticipation<-lm(DailyChange ~ lanticipationf, data=emotionsfrequency)
summary(llmanticipation)
Call:
lm(formula = DailyChange ~ lanticipationf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.072841 -0.005816 0.000733 0.005788 0.077179
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.0004587 0.0020562 -0.223 0.824
lanticipationf 0.0399860 0.0249577 1.602 0.112
Residual standard error: 0.01627 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.02094, Adjusted R-squared: 0.01278
F-statistic: 2.567 on 1 and 120 DF, p-value: 0.1118
lmdisgust<-lm(DailyChange ~ disgustf, data=emotionsfrequency)
summary(lmdisgust)
Call:
lm(formula = DailyChange ~ disgustf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074364 -0.005555 0.000350 0.005625 0.077750
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002450 0.001518 1.614 0.109
disgustf -0.048521 0.029303 -1.656 0.100
Residual standard error: 0.01626 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.02234, Adjusted R-squared: 0.01419
F-statistic: 2.742 on 1 and 120 DF, p-value: 0.1004
llmdisgust<-lm(DailyChange ~ ldisgustf, data=emotionsfrequency)
summary(llmdisgust)
Call:
lm(formula = DailyChange ~ ldisgustf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074201 -0.005650 0.000312 0.005587 0.077712
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002488 0.001527 1.629 0.106
ldisgustf -0.056354 0.034959 -1.612 0.110
Residual standard error: 0.01627 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0212, Adjusted R-squared: 0.01304
F-statistic: 2.599 on 1 and 120 DF, p-value: 0.1096
lmfear<-lm(DailyChange ~ fearf, data=emotionsfrequency)
summary(lmfear)
Call:
lm(formula = DailyChange ~ fearf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075584 -0.005806 0.000325 0.005954 0.079016
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002284 0.001723 1.326 0.187
fearf -0.012097 0.023666 -0.511 0.610
Residual standard error: 0.01643 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.002173, Adjusted R-squared: -0.006143
F-statistic: 0.2613 on 1 and 120 DF, p-value: 0.6102
llmfear<-lm(DailyChange ~ lfearf, data=emotionsfrequency)
summary(llmfear)
Call:
lm(formula = DailyChange ~ lfearf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075475 -0.005804 0.000312 0.005948 0.078872
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002175 0.001758 1.237 0.219
lfearf -0.009741 0.027172 -0.358 0.721
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.00107, Adjusted R-squared: -0.007255
F-statistic: 0.1285 on 1 and 120 DF, p-value: 0.7206
lmjoy<-lm(DailyChange ~ joyf, data=emotionsfrequency)
summary(lmjoy)
Call:
lm(formula = DailyChange ~ joyf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075065 -0.005687 0.000326 0.005356 0.078557
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001643 0.001823 0.901 0.369
joyf 0.004248 0.022790 0.186 0.852
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0002894, Adjusted R-squared: -0.008042
F-statistic: 0.03474 on 1 and 120 DF, p-value: 0.8525
llmjoy<-lm(DailyChange ~ ljoyf, data=emotionsfrequency)
summary(llmjoy)
Call:
lm(formula = DailyChange ~ ljoyf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075023 -0.005829 0.000259 0.005307 0.078693
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001507 0.001872 0.805 0.422
ljoyf 0.007659 0.026146 0.293 0.770
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0007146, Adjusted R-squared: -0.007613
F-statistic: 0.08581 on 1 and 120 DF, p-value: 0.7701
lmsadness<-lm(DailyChange ~ sadnessf, data=emotionsfrequency)
summary(lmsadness)
Call:
lm(formula = DailyChange ~ sadnessf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074977 -0.005841 0.000223 0.005648 0.078523
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001677 0.001767 0.949 0.345
sadnessf 0.007122 0.041773 0.170 0.865
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0002422, Adjusted R-squared: -0.008089
F-statistic: 0.02907 on 1 and 120 DF, p-value: 0.8649
llmsadness<-lm(DailyChange ~ lsadnessf, data=emotionsfrequency)
summary(llmsadness)
Call:
lm(formula = DailyChange ~ lsadnessf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074940 -0.005820 0.000260 0.005638 0.078560
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001640 0.001783 0.920 0.359
lsadnessf 0.009051 0.044612 0.203 0.840
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0003429, Adjusted R-squared: -0.007988
F-statistic: 0.04116 on 1 and 120 DF, p-value: 0.8396
lmsurprise<-lm(DailyChange ~ surprisef, data=emotionsfrequency)
summary(lmsurprise)
Call:
lm(formula = DailyChange ~ surprisef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075154 -0.005886 0.000335 0.005630 0.078597
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001970 0.001672 1.178 0.241
surprisef -0.004029 0.023518 -0.171 0.864
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0002445, Adjusted R-squared: -0.008087
F-statistic: 0.02935 on 1 and 120 DF, p-value: 0.8643
llmsurprise<-lm(DailyChange ~ lsurprisef, data=emotionsfrequency)
summary(llmsurprise)
Call:
lm(formula = DailyChange ~ lsurprisef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075138 -0.005857 0.000290 0.005470 0.078335
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0018256 0.0016966 1.076 0.284
lsurprisef 0.0004571 0.0269478 0.017 0.986
Residual standard error: 0.01645 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 2.397e-06, Adjusted R-squared: -0.008331
F-statistic: 0.0002877 on 1 and 120 DF, p-value: 0.9865
lmtrust<-lm(DailyChange ~ trustf, data=emotionsfrequency)
summary(lmtrust)
Call:
lm(formula = DailyChange ~ trustf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075525 -0.006136 0.000372 0.005620 0.078448
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002441 0.002261 1.080 0.282
trustf -0.007579 0.021436 -0.354 0.724
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.001041, Adjusted R-squared: -0.007284
F-statistic: 0.125 on 1 and 120 DF, p-value: 0.7243
llmtrust<-lm(DailyChange ~ ltrustf, data=emotionsfrequency)
summary(llmtrust)
Call:
lm(formula = DailyChange ~ ltrustf, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075296 -0.006046 0.000305 0.005511 0.078403
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.002091 0.002341 0.893 0.373
ltrustf -0.003384 0.024251 -0.140 0.889
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0001622, Adjusted R-squared: -0.00817
F-statistic: 0.01947 on 1 and 120 DF, p-value: 0.8893
lmnegative<-lm(DailyChange ~ negativef, data=emotionsfrequency)
summary(lmnegative)
Call:
lm(formula = DailyChange ~ negativef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075138 -0.005861 0.000277 0.005421 0.078337
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0017945 0.0018334 0.979 0.330
negativef 0.0007527 0.0179747 0.042 0.967
Residual standard error: 0.01645 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 1.461e-05, Adjusted R-squared: -0.008319
F-statistic: 0.001753 on 1 and 120 DF, p-value: 0.9667
llmnegative<-lm(DailyChange ~ lnegativef, data=emotionsfrequency)
summary(llmnegative)
Call:
lm(formula = DailyChange ~ lnegativef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.075141 -0.005859 0.000190 0.005298 0.078262
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.001669 0.001881 0.887 0.377
lnegativef 0.003095 0.020845 0.148 0.882
Residual standard error: 0.01644 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.0001837, Adjusted R-squared: -0.008148
F-statistic: 0.02205 on 1 and 120 DF, p-value: 0.8822
lmpositive<-lm(DailyChange ~ positivef, data=emotionsfrequency)
summary(lmpositive)
Call:
lm(formula = DailyChange ~ positivef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074392 -0.005560 0.000299 0.005273 0.079320
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.000880 0.002621 0.336 0.738
positivef 0.007419 0.016683 0.445 0.657
Residual standard error: 0.01643 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.001645, Adjusted R-squared: -0.006675
F-statistic: 0.1977 on 1 and 120 DF, p-value: 0.6574
llmpositive<-lm(DailyChange ~ lpositivef, data=emotionsfrequency)
summary(llmpositive)
Call:
lm(formula = DailyChange ~ lpositivef, data = emotionsfrequency)
Residuals:
Min 1Q Median 3Q Max
-0.074230 -0.005559 0.000262 0.005377 0.079553
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0006468 0.0027280 0.237 0.813
lpositivef 0.0100556 0.0192826 0.521 0.603
Residual standard error: 0.01643 on 120 degrees of freedom
(4 observations deleted due to missingness)
Multiple R-squared: 0.002261, Adjusted R-squared: -0.006053
F-statistic: 0.2719 on 1 and 120 DF, p-value: 0.603
library(stargazer)
stargazer(llm, type="text", title="Emotion Analysis: Multilinear Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Multilinear Regression Result (Log).txt")
Emotion Analysis: Multilinear Regression Result
===============================================
Dependent variable:
---------------------------
DailyChange
-----------------------------------------------
langryf -0.013
(0.037)
lanticipationf 0.062
(0.038)
ldisgustf -0.130**
(0.060)
lfearf 0.005
(0.044)
ljoyf -0.048
(0.047)
lsadnessf 0.008
(0.059)
lsurprisef 0.059
(0.054)
ltrustf -0.023
(0.029)
lnegativef 0.014
(0.037)
lpositivef 0.001
(0.028)
Constant 0.001
(0.003)
-----------------------------------------------
Observations 122
R2 0.080
Adjusted R2 -0.003
Residual Std. Error 0.016 (df = 111)
F Statistic 0.960 (df = 10; 111)
===============================================
Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(llm, type="text", title="Emotion Analysis: Multilinear Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Multilinear Regression Result (Log).pdf")
Emotion Analysis: Multilinear Regression Result
===============================================
Dependent variable:
---------------------------
DailyChange
-----------------------------------------------
langryf -0.013
(0.037)
lanticipationf 0.062
(0.038)
ldisgustf -0.130**
(0.060)
lfearf 0.005
(0.044)
ljoyf -0.048
(0.047)
lsadnessf 0.008
(0.059)
lsurprisef 0.059
(0.054)
ltrustf -0.023
(0.029)
lnegativef 0.014
(0.037)
lpositivef 0.001
(0.028)
Constant 0.001
(0.003)
-----------------------------------------------
Observations 122
R2 0.080
Adjusted R2 -0.003
Residual Std. Error 0.016 (df = 111)
F Statistic 0.960 (df = 10; 111)
===============================================
Note: *p<0.1; **p<0.05; ***p<0.01
library(stargazer)
stargazer(lm, type="text", title="Emotion Analysis: Multilinear Regression Result", digits=1, out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis:Multilinear Regression Result.txt")
Emotion Analysis: Multilinear Regression Result
===============================================
Dependent variable:
---------------------------
DailyChange
-----------------------------------------------
angryf -0.01
(0.03)
anticipationf 0.1
(0.03)
disgustf -0.1**
(0.1)
fearf 0.003
(0.04)
joyf -0.05
(0.04)
sadnessf 0.01
(0.1)
surprisef 0.1
(0.05)
trustf -0.02
(0.03)
negativef 0.01
(0.03)
positivef 0.004
(0.02)
Constant 0.001
(0.003)
-----------------------------------------------
Observations 122
R2 0.1
Adjusted R2 -0.001
Residual Std. Error 0.02 (df = 111)
F Statistic 1.0 (df = 10; 111)
===============================================
Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(llmangry, llmanticipation, llmdisgust, llmfear, llmjoy, llmsadness, llmsurprise, llmtrust, llmpositive, llmnegative, type="text", title="Emotion Analysis:Linear Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Linear Regression Result (Log).txt")
Emotion Analysis:Linear Regression Result
==============================================================================================================
Dependent variable:
-------------------------------------------------------------------------------
DailyChange
(1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
--------------------------------------------------------------------------------------------------------------
langryf -0.004
(0.029)
lanticipationf 0.040
(0.025)
ldisgustf -0.056
(0.035)
lfearf -0.010
(0.027)
ljoyf 0.008
(0.026)
lsadnessf 0.009
(0.045)
lsurprisef 0.0005
(0.027)
ltrustf -0.003
(0.024)
lpositivef 0.010
(0.019)
lnegativef 0.003
(0.021)
Constant 0.002 -0.0005 0.002 0.002 0.002 0.002 0.002 0.002 0.001 0.002
(0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.003) (0.002)
--------------------------------------------------------------------------------------------------------------
Observations 122 122 122 122 122 122 122 122 122 122
R2 0.0002 0.021 0.021 0.001 0.001 0.0003 0.00000 0.0002 0.002 0.0002
Adjusted R2 -0.008 0.013 0.013 -0.007 -0.008 -0.008 -0.008 -0.008 -0.006 -0.008
Residual Std. Error (df = 120) 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
F Statistic (df = 1; 120) 0.019 2.567 2.599 0.129 0.086 0.041 0.0003 0.019 0.272 0.022
==============================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(lmangry, lmanticipation, lmdisgust, lmfear, lmjoy, lmsadness, lmsurprise, lmtrust, lmpositive, lmnegative, type="text", title="Emotion Analysis:Linear Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Linear Regression Result.txt")
Emotion Analysis:Linear Regression Result
==============================================================================================================
Dependent variable:
-------------------------------------------------------------------------------
DailyChange
(1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
--------------------------------------------------------------------------------------------------------------
angryf -0.002
(0.025)
anticipationf 0.031
(0.022)
disgustf -0.049
(0.029)
fearf -0.012
(0.024)
joyf 0.004
(0.023)
sadnessf 0.007
(0.042)
surprisef -0.004
(0.024)
trustf -0.008
(0.021)
positivef 0.007
(0.017)
negativef 0.001
(0.018)
Constant 0.002 -0.0001 0.002 0.002 0.002 0.002 0.002 0.002 0.001 0.002
(0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.002) (0.003) (0.002)
--------------------------------------------------------------------------------------------------------------
Observations 122 122 122 122 122 122 122 122 122 122
R2 0.00003 0.017 0.022 0.002 0.0003 0.0002 0.0002 0.001 0.002 0.00001
Adjusted R2 -0.008 0.008 0.014 -0.006 -0.008 -0.008 -0.008 -0.007 -0.007 -0.008
Residual Std. Error (df = 120) 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
F Statistic (df = 1; 120) 0.004 2.019 2.742 0.261 0.035 0.029 0.029 0.125 0.198 0.002
==============================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
Prepare corpus for identifying emotions.
For tweets that are associated with daily stock price increase/decrease:
iTesla<-Tesla %>%
filter(DailyChange>0)
dTesla<-Tesla %>%
filter(DailyChange<0)
#Increasing daily stock prices
doc_id<-c(1:80)
line<-c(rep(1,80))
text<-data.frame(doc_id, text=c(iTesla$tweet),line, stringsAsFactors=FALSE)
i_df_source<-DataframeSource(text)
i_df_nrc<-VCorpus(i_df_source)
i_nrc_clean<-clean_corpus(i_df_nrc)
i_nrc_dtm<-DocumentTermMatrix(i_nrc_clean)
i_nrc_m<-as.matrix(i_nrc_dtm)
i_nrc_td<-tidy(i_nrc_dtm)
i_nrc_tf_idf<-i_nrc_td %>%
bind_tf_idf(term, document, count)
i_nrc_tf_idf<-i_nrc_tf_idf %>%
arrange(desc(tf_idf))
i_nrc_tf_idf
#Decreasing daily stock prices
doc_id<-c(1:46)
line<-c(rep(1,46))
text<-data.frame(doc_id, text=c(dTesla$tweet),line, stringsAsFactors=FALSE)
d_df_source<-DataframeSource(text)
d_df_nrc<-VCorpus(d_df_source)
d_nrc_clean<-clean_corpus(d_df_nrc)
d_nrc_dtm<-DocumentTermMatrix(d_nrc_clean)
d_nrc_m<-as.matrix(d_nrc_dtm)
d_nrc_td<-tidy(d_nrc_dtm)
d_nrc_tf_idf<-d_nrc_td %>%
bind_tf_idf(term, document, count)
d_nrc_tf_idf<-d_nrc_tf_idf %>%
arrange(desc(tf_idf))
d_nrc_tf_idf
Find tf_idf score for angry words used in the tweets.
ianger<-i_nrc_tf_idf %>%
filter(term %in% nrc_anger$word) %>%
select(term,tf_idf)
ianger<-rename(ianger, i_tf_idf=tf_idf)
danger<-d_nrc_tf_idf %>%
filter(term %in% nrc_anger$word) %>%
select(term,tf_idf)
danger<-rename(danger, d_tf_idf=tf_idf)
angerwords<-full_join(ianger, danger,by="term")
#Replace all NA values as 0
angerwords$i_tf_idf[is.na(angerwords$i_tf_idf)]<-0
angerwords$d_tf_idf[is.na(angerwords$d_tf_idf)]<-0
angerwords
Visualize the relationship between the use of words from the angry category and stock price change.
plot2<-angerwords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(angry words) in tweets associated with stock price increase", y="f(angry words) in tweets associated with stock price decrease", title="Frequency of angry words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,3), breaks=seq(0,3,0.5))+
scale_y_continuous(limits=c(0,3), breaks=seq(0,3,0.5))#+
#facet_zoom(x=i_tf_idf<=0.5, y=d_tf_idf<=0.5)
plot2
#ggsave('angry diff.png')
Find tf_idf score for anticipation words used in the tweets.
ianticipation<-i_nrc_tf_idf %>%
filter(term %in% nrc_anticipation$word) %>%
select(term,tf_idf)
ianticipation<-rename(ianticipation, i_tf_idf=tf_idf)
danticipation<-d_nrc_tf_idf %>%
filter(term %in% nrc_anticipation$word) %>%
select(term,tf_idf)
danticipation<-rename(danticipation, d_tf_idf=tf_idf)
anticipationwords<-full_join(ianticipation, danticipation,by="term")
#Replace all NA values as 0
anticipationwords$i_tf_idf[is.na(anticipationwords$i_tf_idf)]<-0
anticipationwords$d_tf_idf[is.na(anticipationwords$d_tf_idf)]<-0
anticipationwords
Visualize the relationship between the use of words from the anticipation category and stock price change.
plot3<-anticipationwords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(anticipation words) in tweets associated with stock price increase", y="f(anticipation words) in tweets associated with stock price decrease", title="Frequency of anticipation words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,3), breaks=seq(0,3,0.5))+
scale_y_continuous(limits=c(0,3), breaks=seq(0,3,0.5))
plot3
#ggsave('anticipation diff.png')
Find tf_idf score for disgust words used in the tweets.
idisgust<-i_nrc_tf_idf %>%
filter(term %in% nrc_disgust$word) %>%
select(term,tf_idf)
idisgust<-rename(idisgust, i_tf_idf=tf_idf)
ddisgust<-d_nrc_tf_idf %>%
filter(term %in% nrc_disgust$word) %>%
select(term,tf_idf)
ddisgust<-rename(ddisgust, d_tf_idf=tf_idf)
disgustwords<-full_join(idisgust, ddisgust,by="term")
#Replace all NA values as 0
disgustwords$i_tf_idf[is.na(disgustwords$i_tf_idf)]<-0
disgustwords$d_tf_idf[is.na(disgustwords$d_tf_idf)]<-0
disgustwords
Visualize the relationship between the use of words from the disgust category and stock price change
plot4<-disgustwords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(disgust words) in tweets associated with stock price increase", y="f(disgust words) in tweets associated with stock price decrease", title="Frequency of disgust words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2), breaks=seq(0,2,0.5))+
scale_y_continuous(limits=c(0,2), breaks=seq(0,2,0.5))
plot4
#ggsave('disgust diff.png')
Find tf_idf score for fear words used in the tweets.
ifear<-i_nrc_tf_idf %>%
filter(term %in% nrc_fear$word) %>%
select(term,tf_idf)
ifear<-rename(ifear, i_tf_idf=tf_idf)
dfear<-d_nrc_tf_idf %>%
filter(term %in% nrc_fear$word) %>%
select(term,tf_idf)
dfear<-rename(dfear, d_tf_idf=tf_idf)
fearwords<-full_join(ifear, dfear,by="term")
#Replace all NA values as 0
fearwords$i_tf_idf[is.na(fearwords$i_tf_idf)]<-0
fearwords$d_tf_idf[is.na(fearwords$d_tf_idf)]<-0
fearwords
Visualize the relationship between the use of words from the fear category and stock price change.
plot5<-fearwords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(fear words) in tweets associated with stock price increase", y="f(fear words) in tweets associated with stock price decrease", title="Frequency of fear words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2), breaks=seq(0,2,0.5))+
scale_y_continuous(limits=c(0,2), breaks=seq(0,2,0.5))
plot5
#ggsave('fear diff.png')
Find tf_idf score for joy words used in the tweets.
ijoy<-i_nrc_tf_idf %>%
filter(term %in% nrc_joy$word) %>%
select(term,tf_idf)
ijoy<-rename(ijoy, i_tf_idf=tf_idf)
djoy<-d_nrc_tf_idf %>%
filter(term %in% nrc_joy$word) %>%
select(term,tf_idf)
djoy<-rename(djoy, d_tf_idf=tf_idf)
joywords<-full_join(ijoy, djoy,by="term")
#Replace all NA values as 0
joywords$i_tf_idf[is.na(joywords$i_tf_idf)]<-0
joywords$d_tf_idf[is.na(joywords$d_tf_idf)]<-0
joywords
Visualize the relationship between the use of words from the joy category and stock price change.
plot6<-joywords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(joy words) in tweets associated with stock price increase", y="f(joy words) in tweets associated with stock price decrease", title="Frequency of joy words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,3), breaks=seq(0,3,0.5))+
scale_y_continuous(limits=c(0,3), breaks=seq(0,3,0.5))
plot6
#ggsave('joy diff.png')
Find tf_idf score for sadness words used in the tweets.
isadness<-i_nrc_tf_idf %>%
filter(term %in% nrc_sadness$word) %>%
select(term,tf_idf)
isadness<-rename(isadness, i_tf_idf=tf_idf)
dsadness<-d_nrc_tf_idf %>%
filter(term %in% nrc_sadness$word) %>%
select(term,tf_idf)
dsadness<-rename(dsadness, d_tf_idf=tf_idf)
sadnesswords<-full_join(isadness, dsadness,by="term")
#Replace all NA values as 0
sadnesswords$i_tf_idf[is.na(sadnesswords$i_tf_idf)]<-0
sadnesswords$d_tf_idf[is.na(sadnesswords$d_tf_idf)]<-0
sadnesswords
Visualize the relationship between the use of words from the sadness category and stock price change.
plot7<-sadnesswords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(sadness words) in tweets associated with stock price increase", y="f(sadness words) in tweets associated with stock price decrease", title="Frequency of sadness words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2.5), breaks=seq(0,2.5,0.5))+
scale_y_continuous(limits=c(0,2.5), breaks=seq(0,2.5,0.5))
plot7
#ggsave('sadness diff.png')
Find tf_idf score for surprise words used in the tweets.
isurprise<-i_nrc_tf_idf %>%
filter(term %in% nrc_surprise$word) %>%
select(term,tf_idf)
isurprise<-rename(isurprise, i_tf_idf=tf_idf)
dsurprise<-d_nrc_tf_idf %>%
filter(term %in% nrc_surprise$word) %>%
select(term,tf_idf)
dsurprise<-rename(dsurprise, d_tf_idf=tf_idf)
surprisewords<-full_join(isurprise, dsurprise,by="term")
#Replace all NA values as 0
surprisewords$i_tf_idf[is.na(surprisewords$i_tf_idf)]<-0
surprisewords$d_tf_idf[is.na(surprisewords$d_tf_idf)]<-0
surprisewords
Visualize the relationship between the use of words from the surprise category and stock price change.
plot8<-surprisewords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(surprise words) in tweets associated with stock price increase", y="f(surprise words) in tweets associated with stock price decrease", title="Frequency of surprise words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2), breaks=seq(0,2,0.5))+
scale_y_continuous(limits=c(0,2), breaks=seq(0,2,0.5))
plot8
#ggsave('surprise diff.png')
Find tf_idf score for trust words used in the tweets.
itrust<-i_nrc_tf_idf %>%
filter(term %in% nrc_trust$word) %>%
select(term,tf_idf)
itrust<-rename(itrust, i_tf_idf=tf_idf)
dtrust<-d_nrc_tf_idf %>%
filter(term %in% nrc_trust$word) %>%
select(term,tf_idf)
dtrust<-rename(dtrust, d_tf_idf=tf_idf)
trustwords<-full_join(itrust, dtrust,by="term")
#Replace all NA values as 0
trustwords$i_tf_idf[is.na(trustwords$i_tf_idf)]<-0
trustwords$d_tf_idf[is.na(trustwords$d_tf_idf)]<-0
trustwords
Visualize the relationship between the use of words from the trust category and stock price change.
plot9<-trustwords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(trust words) in tweets associated with stock price increase", y="f(trust words) in tweets associated with stock price decrease", title="Frequency of trust words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2), breaks=seq(0,2,0.5))+
scale_y_continuous(limits=c(0,2), breaks=seq(0,2,0.5))
plot9
#ggsave('trust diff.png')
Find tf_idf score for negative words used in the tweets.
inegative<-i_nrc_tf_idf %>%
filter(term %in% nrc_negative$word) %>%
select(term,tf_idf)
inegative<-rename(inegative, i_tf_idf=tf_idf)
dnegative<-d_nrc_tf_idf %>%
filter(term %in% nrc_negative$word) %>%
select(term,tf_idf)
dnegative<-rename(dnegative, d_tf_idf=tf_idf)
negativewords<-full_join(inegative, dnegative,by="term")
#Replace all NA values as 0
negativewords$i_tf_idf[is.na(negativewords$i_tf_idf)]<-0
negativewords$d_tf_idf[is.na(negativewords$d_tf_idf)]<-0
negativewords
Visualize the relationship between the use of words from the negative category and stock price change.
plot10<-negativewords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(negative words) in tweets associated with stock price increase", y="f(negative words) in tweets associated with stock price decrease", title="Frequency of negative words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,2), breaks=seq(0,2,0.5))+
scale_y_continuous(limits=c(0,2), breaks=seq(0,2,0.5))
plot10
#ggsave('negative diff.png')
Find tf_idf score for positive words used in the tweets.
ipositive<-i_nrc_tf_idf %>%
filter(term %in% nrc_positive$word) %>%
select(term,tf_idf)
ipositive<-rename(ipositive, i_tf_idf=tf_idf)
dpositive<-d_nrc_tf_idf %>%
filter(term %in% nrc_positive$word) %>%
select(term,tf_idf)
dpositive<-rename(dpositive, d_tf_idf=tf_idf)
positivewords<-full_join(ipositive, dpositive,by="term")
#Replace all NA values as 0
positivewords$i_tf_idf[is.na(positivewords$i_tf_idf)]<-0
positivewords$d_tf_idf[is.na(positivewords$d_tf_idf)]<-0
positivewords
Visualize the relationship between the use of words from the positive category and stock price change.
plot11<-positivewords %>%
ggplot(aes(x=i_tf_idf,y=d_tf_idf))+
geom_jitter(color="#00CED1")+
theme_bw()+
theme(legend.position="none")+
labs(x="f(positive words) in tweets associated with stock price increase", y="f(positive words) in tweets associated with stock price decrease", title="Frequency of positive words in tweets associated with stock price increase/decrease")+
theme(plot.title=element_text(hjust=0.5))+
scale_x_continuous(limits=c(0,3), breaks=seq(0,3,0.5))+
scale_y_continuous(limits=c(0,3), breaks=seq(0,3,0.5))
plot11
#ggsave('positive diff.png')
Logistic Regression
emotionsfrequency$Direction[emotionsfrequency$DailyChange>0]<-1
emotionsfrequency$Direction[emotionsfrequency$DailyChange==0]<-0
emotionsfrequency$Direction[emotionsfrequency$DailyChange<0]<--1
emotionsfrequency$Direction<-as.factor(emotionsfrequency$Direction)
emotionsfrequency
logit<-glm(Direction ~ angryf+anticipationf+disgustf+fearf+joyf+sadnessf+surprisef+trustf+negativef+positivef, emotionsfrequency, family=binomial(link ="logit"))
summary(logit)
Call:
glm(formula = Direction ~ angryf + anticipationf + disgustf +
fearf + joyf + sadnessf + surprisef + trustf + negativef +
positivef, family = binomial(link = "logit"), data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3098 -1.2078 0.7149 0.9781 1.4492
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.01673 0.43064 -0.039 0.9690
angryf 5.29635 4.64536 1.140 0.2542
anticipationf -1.80057 4.56033 -0.395 0.6930
disgustf -7.02682 7.42839 -0.946 0.3442
fearf -4.87369 5.31455 -0.917 0.3591
joyf -10.99557 6.02581 -1.825 0.0680 .
sadnessf 5.32096 7.75805 0.686 0.4928
surprisef 12.01708 6.88109 1.746 0.0807 .
trustf 1.64431 3.75967 0.437 0.6619
negativef -2.78715 4.59601 -0.606 0.5442
positivef 6.67718 3.41520 1.955 0.0506 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 150.91 on 111 degrees of freedom
(4 observations deleted due to missingness)
AIC: 172.91
Number of Fisher Scoring iterations: 4
stargazer(logit, type="text", title="Emotion Analysis: Multi-logistic Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Multi-logistic Regression Result.txt")
Emotion Analysis: Multi-logistic Regression Result
=============================================
Dependent variable:
---------------------------
Direction
---------------------------------------------
angryf 5.296
(4.645)
anticipationf -1.801
(4.560)
disgustf -7.027
(7.428)
fearf -4.874
(5.315)
joyf -10.996*
(6.026)
sadnessf 5.321
(7.758)
surprisef 12.017*
(6.881)
trustf 1.644
(3.760)
negativef -2.787
(4.596)
positivef 6.677*
(3.415)
Constant -0.017
(0.431)
---------------------------------------------
Observations 122
Log Likelihood -75.453
Akaike Inf. Crit. 172.905
=============================================
Note: *p<0.1; **p<0.05; ***p<0.01
llogit<-glm(Direction ~ langryf+lanticipationf+ldisgustf+lfearf+ljoyf+lsadnessf+lsurprisef+ltrustf+lpositivef+lnegativef, emotionsfrequency, family=binomial(link ="logit"))
summary(llogit)
Call:
glm(formula = Direction ~ langryf + lanticipationf + ldisgustf +
lfearf + ljoyf + lsadnessf + lsurprisef + ltrustf + lpositivef +
lnegativef, family = binomial(link = "logit"), data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3249 -1.2087 0.7374 0.9890 1.4407
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.04333 0.44553 -0.097 0.9225
langryf 5.64730 5.06677 1.115 0.2650
lanticipationf -2.37994 4.90886 -0.485 0.6278
ldisgustf -7.20911 8.11123 -0.889 0.3741
lfearf -5.45094 5.79439 -0.941 0.3468
ljoyf -11.35046 6.49781 -1.747 0.0807 .
lsadnessf 4.86229 7.98701 0.609 0.5427
lsurprisef 12.81309 7.39783 1.732 0.0833 .
ltrustf 2.32667 4.22383 0.551 0.5817
lpositivef 7.34769 3.84762 1.910 0.0562 .
lnegativef -2.87934 5.00563 -0.575 0.5651
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 151.42 on 111 degrees of freedom
(4 observations deleted due to missingness)
AIC: 173.42
Number of Fisher Scoring iterations: 4
stargazer(llogit, type="text", title="Emotion Analysis: Multi-logistic Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Multi-logistic Regression Result (Log).txt")
Emotion Analysis: Multi-logistic Regression Result
=============================================
Dependent variable:
---------------------------
Direction
---------------------------------------------
langryf 5.647
(5.067)
lanticipationf -2.380
(4.909)
ldisgustf -7.209
(8.111)
lfearf -5.451
(5.794)
ljoyf -11.350*
(6.498)
lsadnessf 4.862
(7.987)
lsurprisef 12.813*
(7.398)
ltrustf 2.327
(4.224)
lpositivef 7.348*
(3.848)
lnegativef -2.879
(5.006)
Constant -0.043
(0.446)
---------------------------------------------
Observations 122
Log Likelihood -75.710
Akaike Inf. Crit. 173.421
=============================================
Note: *p<0.1; **p<0.05; ***p<0.01
logitangry<-glm(Direction ~ angryf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitangry)
Call:
glm(formula = Direction ~ angryf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6385 -1.3782 0.9384 0.9891 0.9891
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.4606 0.2103 2.190 0.0285 *
angryf 2.8950 3.8212 0.758 0.4487
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 159.97 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 163.97
Number of Fisher Scoring iterations: 4
llogitangry<-glm(Direction ~ langryf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitangry)
Call:
glm(formula = Direction ~ langryf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6289 -1.3782 0.9367 0.9891 0.9891
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.4606 0.2126 2.167 0.0302 *
langryf 3.0586 4.2034 0.728 0.4668
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.04 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.04
Number of Fisher Scoring iterations: 4
logitanticipation<-glm(Direction ~ anticipationf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitanticipation)
Call:
glm(formula = Direction ~ anticipationf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4421 -1.4099 0.9341 0.9640 1.0490
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.6036 0.2538 2.378 0.0174 *
anticipationf -1.0774 2.7472 -0.392 0.6949
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.48 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.48
Number of Fisher Scoring iterations: 4
llogitanticipation<-glm(Direction ~ lanticipationf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitanticipation)
Call:
glm(formula = Direction ~ lanticipationf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4346 -1.4097 0.9404 0.9635 1.0211
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5869 0.2621 2.239 0.0251 *
lanticipationf -0.8609 3.1441 -0.274 0.7842
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.56 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.56
Number of Fisher Scoring iterations: 4
logitdisgust<-glm(Direction ~ disgustf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitdisgust)
Call:
glm(formula = Direction ~ disgustf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4294 -1.4294 0.9448 0.9448 1.1855
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5754 0.1943 2.961 0.00306 **
disgustf -2.9723 3.9005 -0.762 0.44603
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.00 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164
Number of Fisher Scoring iterations: 4
llogitdisgust<-glm(Direction ~ ldisgustf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitdisgust)
Call:
glm(formula = Direction ~ ldisgustf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4292 -1.4292 0.9449 0.9449 1.1806
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5749 0.1953 2.944 0.00324 **
ldisgustf -3.1942 4.5120 -0.708 0.47898
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.11 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.11
Number of Fisher Scoring iterations: 4
logitfear<-glm(Direction ~ fearf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitfear)
Call:
glm(formula = Direction ~ fearf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4421 -1.4137 0.9340 0.9528 1.1274
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.6037 0.2181 2.767 0.00565 **
fearf -1.7782 2.9283 -0.607 0.54369
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.27 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.27
Number of Fisher Scoring iterations: 4
llogitfear<-glm(Direction ~ lfearf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitfear)
Call:
glm(formula = Direction ~ lfearf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4404 -1.4123 0.9354 0.9541 1.1070
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5999 0.2224 2.698 0.00698 **
lfearf -1.7908 3.3554 -0.534 0.59353
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.35 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.35
Number of Fisher Scoring iterations: 4
logitjoy<-glm(Direction ~ joyf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitjoy)
Call:
glm(formula = Direction ~ joyf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4558 -1.4104 0.9225 0.9605 1.1476
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.6343 0.2311 2.745 0.00605 **
joyf -2.0669 2.8293 -0.731 0.46507
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.10 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.1
Number of Fisher Scoring iterations: 4
llogitjoy<-glm(Direction ~ ljoyf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitjoy)
Call:
glm(formula = Direction ~ ljoyf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4502 -1.4090 0.9272 0.9617 1.1106
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.6216 0.2368 2.625 0.00866 **
ljoyf -1.9173 3.2332 -0.593 0.55319
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.29 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.29
Number of Fisher Scoring iterations: 4
logitsadness<-glm(Direction ~ sadnessf, emotionsfrequency, family=binomial(link ="logit"))
summary(logitsadness)
Call:
glm(formula = Direction ~ sadnessf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4124 -1.4123 0.9593 0.9594 0.9597
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.537309 0.222767 2.412 0.0159 *
sadnessf -0.007262 5.264444 -0.001 0.9989
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.64 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.64
Number of Fisher Scoring iterations: 4
llogitsadness<-glm(Direction ~ lsadnessf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitsadness)
Call:
glm(formula = Direction ~ lsadnessf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4408 -1.4092 0.9569 0.9621 0.9621
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5302 0.2246 2.360 0.0183 *
lsadnessf 0.3162 5.6432 0.056 0.9553
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.63 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.63
Number of Fisher Scoring iterations: 4
logitsurprise<-glm(Direction ~ surprisef, emotionsfrequency, family=binomial(link ="logit"))
summary(logitsurprise)
Call:
glm(formula = Direction ~ surprisef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4135 -1.4127 0.9584 0.9591 0.9644
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.53963 0.21068 2.561 0.0104 *
surprisef -0.07691 2.95653 -0.026 0.9792
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.63 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.63
Number of Fisher Scoring iterations: 4
llogitsurprise<-glm(Direction ~ lsurprisef, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitsurprise)
Call:
glm(formula = Direction ~ lsurprisef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4838 -1.4066 0.9538 0.9643 0.9643
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5244 0.2138 2.453 0.0142 *
lsurprisef 0.4246 3.4371 0.124 0.9017
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.62 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.62
Number of Fisher Scoring iterations: 4
logitsurprise<-glm(Direction ~ surprisef, emotionsfrequency, family=binomial(link ="logit"))
summary(logitsurprise)
Call:
glm(formula = Direction ~ surprisef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4135 -1.4127 0.9584 0.9591 0.9644
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.53963 0.21068 2.561 0.0104 *
surprisef -0.07691 2.95653 -0.026 0.9792
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.63 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.63
Number of Fisher Scoring iterations: 4
llogitsurprise<-glm(Direction ~ lsurprisef, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitsurprise)
Call:
glm(formula = Direction ~ lsurprisef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4838 -1.4066 0.9538 0.9643 0.9643
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5244 0.2138 2.453 0.0142 *
lsurprisef 0.4246 3.4371 0.124 0.9017
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.62 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.62
Number of Fisher Scoring iterations: 4
logittrust<-glm(Direction ~ trustf, emotionsfrequency, family=binomial(link ="logit"))
summary(logittrust)
Call:
glm(formula = Direction ~ trustf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7021 -1.3718 0.9348 0.9661 1.0155
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.3936 0.2869 1.372 0.170
trustf 1.8369 2.8227 0.651 0.515
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.20 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.2
Number of Fisher Scoring iterations: 4
llogittrust<-glm(Direction ~ ltrustf, emotionsfrequency, family=binomial(link ="logit"))
summary(llogittrust)
Call:
glm(formula = Direction ~ ltrustf, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7287 -1.3627 0.9272 0.9659 1.0305
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.3558 0.2963 1.201 0.230
ltrustf 2.4791 3.1917 0.777 0.437
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.01 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.01
Number of Fisher Scoring iterations: 4
logitnegative<-glm(Direction ~ negativef, emotionsfrequency, family=binomial(link ="logit"))
summary(logitnegative)
Call:
glm(formula = Direction ~ negativef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4200 -1.4105 0.9528 0.9587 1.0087
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5543 0.2311 2.399 0.0165 *
negativef -0.2870 2.2472 -0.128 0.8984
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.62 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.62
Number of Fisher Scoring iterations: 4
llogitnegative<-glm(Direction ~ lnegativef, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitnegative)
Call:
glm(formula = Direction ~ lnegativef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.4186 -1.4105 0.9540 0.9591 0.9936
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5510 0.2371 2.324 0.0201 *
lnegativef -0.2516 2.6136 -0.096 0.9233
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 160.63 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 164.63
Number of Fisher Scoring iterations: 4
logitpositive<-glm(Direction ~ positivef, emotionsfrequency, family=binomial(link ="logit"))
summary(logitpositive)
Call:
glm(formula = Direction ~ positivef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7453 -1.3783 0.9083 0.9725 1.0591
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.2848 0.3308 0.861 0.389
positivef 1.9847 2.1788 0.911 0.362
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 159.78 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 163.78
Number of Fisher Scoring iterations: 4
llogitpositive<-glm(Direction ~ lpositivef, emotionsfrequency, family=binomial(link ="logit"))
summary(llogitpositive)
Call:
glm(formula = Direction ~ lpositivef, family = binomial(link = "logit"),
data = emotionsfrequency)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7351 -1.3774 0.9015 0.9711 1.0742
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.2477 0.3430 0.722 0.470
lpositivef 2.4826 2.5023 0.992 0.321
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 160.64 on 121 degrees of freedom
Residual deviance: 159.63 on 120 degrees of freedom
(4 observations deleted due to missingness)
AIC: 163.63
Number of Fisher Scoring iterations: 4
stargazer(logitangry, logitanticipation, logitdisgust, logitfear, logitjoy, logitsadness, logitsurprise, logittrust, logitpositive, logitnegative, type="text", title="Emotion Analysis: Logistic Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Logistic Regression Result.txt")
Emotion Analysis: Logistic Regression Result
====================================================================================================
Dependent variable:
----------------------------------------------------------------------------------
Direction
(1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
----------------------------------------------------------------------------------------------------
angryf 2.895
(3.821)
anticipationf -1.077
(2.747)
disgustf -2.972
(3.900)
fearf -1.778
(2.928)
joyf -2.067
(2.829)
sadnessf -0.007
(5.264)
surprisef -0.077
(2.957)
trustf 1.837
(2.823)
positivef 1.985
(2.179)
negativef -0.287
(2.247)
Constant 0.461** 0.604** 0.575*** 0.604*** 0.634*** 0.537** 0.540** 0.394 0.285 0.554**
(0.210) (0.254) (0.194) (0.218) (0.231) (0.223) (0.211) (0.287) (0.331) (0.231)
----------------------------------------------------------------------------------------------------
Observations 122 122 122 122 122 122 122 122 122 122
Log Likelihood -79.984 -80.241 -79.998 -80.133 -80.049 -80.318 -80.317 -80.099 -79.891 -80.310
Akaike Inf. Crit. 163.967 164.483 163.997 164.266 164.098 164.635 164.635 164.198 163.782 164.619
====================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(llogitangry, llogitanticipation, llogitdisgust, llogitfear, llogitjoy, llogitsadness, llogitsurprise, llogittrust, llogitpositive, llogitnegative, type="text", title="Emotion Analysis: Logistic Regression Result", out="/Users/annie/Desktop/Columbia Fall2021/Columbia Course Fall 2021/GR5067 NLP/Group Project/Tesla/Emotion Analysis: Logistic Regression Result (Log).txt")
Emotion Analysis: Logistic Regression Result
====================================================================================================
Dependent variable:
----------------------------------------------------------------------------------
Direction
(1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
----------------------------------------------------------------------------------------------------
langryf 3.059
(4.203)
lanticipationf -0.861
(3.144)
ldisgustf -3.194
(4.512)
lfearf -1.791
(3.355)
ljoyf -1.917
(3.233)
lsadnessf 0.316
(5.643)
lsurprisef 0.425
(3.437)
ltrustf 2.479
(3.192)
lpositivef 2.483
(2.502)
lnegativef -0.252
(2.614)
Constant 0.461** 0.587** 0.575*** 0.600*** 0.622*** 0.530** 0.524** 0.356 0.248 0.551**
(0.213) (0.262) (0.195) (0.222) (0.237) (0.225) (0.214) (0.296) (0.343) (0.237)
----------------------------------------------------------------------------------------------------
Observations 122 122 122 122 122 122 122 122 122 122
Log Likelihood -80.021 -80.280 -80.053 -80.176 -80.143 -80.316 -80.310 -80.006 -79.814 -80.313
Akaike Inf. Crit. 164.043 164.561 164.105 164.352 164.285 164.632 164.620 164.011 163.628 164.626
====================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01